{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Pandas-数据清洗和准备"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "内容介绍:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "d    0\n",
      "b    1\n",
      "c    2\n",
      "a    3\n",
      "e    4\n",
      "dtype: int64\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>B</th>\n",
       "      <th>A</th>\n",
       "      <th>C</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>d</th>\n",
       "      <td>-4</td>\n",
       "      <td>-2</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>-1</td>\n",
       "      <td>-9</td>\n",
       "      <td>-7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>c</th>\n",
       "      <td>-9</td>\n",
       "      <td>-3</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>4</td>\n",
       "      <td>7</td>\n",
       "      <td>-8</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   B  A  C\n",
       "d -4 -2  1\n",
       "b -1 -9 -7\n",
       "c -9 -3  2\n",
       "a  4  7 -8"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 示例数据\n",
    "s0 = pd.Series(range(5),index=['d','b','c','a','e'])\n",
    "print(s0)\n",
    "df0 = pd.DataFrame(np.random.randint(-9,9,size=(4,3)),index=['d','b','c','a'],columns=['B','A','C'])\n",
    "df0"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 1.缺失值处理\n",
    "\n",
    "笔记15#针对缺失值的处理流程:（1）发现缺失值、（2）丢弃缺失值、（3）填充/替换缺失值。"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 1-(2)丢弃缺失值\n",
    "\n",
    "dropna(self, axis=0, how='any', thresh=None, subset=None, inplace=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>B</th>\n",
       "      <th>A</th>\n",
       "      <th>C</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>d</th>\n",
       "      <td>-4.0</td>\n",
       "      <td>-2.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>c</th>\n",
       "      <td>-9.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>4.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>-8.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     B    A    C\n",
       "d -4.0 -2.0  1.0\n",
       "b  NaN  NaN  NaN\n",
       "c -9.0  NaN  2.0\n",
       "a  4.0  7.0 -8.0"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df0.loc['c','A'] = None\n",
    "df0.loc['b'] = None\n",
    "df0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>B</th>\n",
       "      <th>A</th>\n",
       "      <th>C</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>d</th>\n",
       "      <td>-4.0</td>\n",
       "      <td>-2.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>c</th>\n",
       "      <td>-9.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>4.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>-8.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     B    A    C\n",
       "d -4.0 -2.0  1.0\n",
       "c -9.0  NaN  2.0\n",
       "a  4.0  7.0 -8.0"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#关于丢弃缺失值的补充方法：丢弃整行为Nan的行\n",
    "# how : {'any', 'all'}, default 'any'\n",
    "df0.dropna(how='all')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>B</th>\n",
       "      <th>A</th>\n",
       "      <th>C</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>d</th>\n",
       "      <td>-4.0</td>\n",
       "      <td>-2.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>c</th>\n",
       "      <td>-9.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>4.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>-8.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     B    A    C\n",
       "d -4.0 -2.0  1.0\n",
       "b  NaN  NaN  NaN\n",
       "c -9.0  NaN  2.0\n",
       "a  4.0  7.0 -8.0"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#按照列删除全是nan值的列\n",
    "#由于没有全是nan的列，那么没有删除任何列\n",
    "df0.dropna(axis=1,how='all')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>B</th>\n",
       "      <th>A</th>\n",
       "      <th>C</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>d</th>\n",
       "      <td>-4.0</td>\n",
       "      <td>-2.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>4.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>-8.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     B    A    C\n",
       "d -4.0 -2.0  1.0\n",
       "a  4.0  7.0 -8.0"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#针对某一列去除nan值使用，subset参数\n",
    "df0.dropna(subset=['A'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.864100</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.771696</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.406316</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.533904</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.727263</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.727816</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.767004</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>0.847191</td>\n",
       "      <td>0.499481</td>\n",
       "      <td>0.548473</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>0.770602</td>\n",
       "      <td>0.358454</td>\n",
       "      <td>0.519032</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          0         1         2\n",
       "0  0.864100       NaN       NaN\n",
       "1  0.771696       NaN       NaN\n",
       "2  0.406316       NaN       NaN\n",
       "3  0.533904       NaN  0.727263\n",
       "4  0.727816       NaN  0.767004\n",
       "5  0.847191  0.499481  0.548473\n",
       "6  0.770602  0.358454  0.519032"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df1=pd.DataFrame(np.random.rand(7,3))\n",
    "df1.loc[:4,1]=np.nan\n",
    "df1.loc[:2,2]=np.nan\n",
    "df1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.533904</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.727263</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.727816</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.767004</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>0.847191</td>\n",
       "      <td>0.499481</td>\n",
       "      <td>0.548473</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>0.770602</td>\n",
       "      <td>0.358454</td>\n",
       "      <td>0.519032</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          0         1         2\n",
       "3  0.533904       NaN  0.727263\n",
       "4  0.727816       NaN  0.767004\n",
       "5  0.847191  0.499481  0.548473\n",
       "6  0.770602  0.358454  0.519032"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# thresh : int, optional\n",
    "#        Require that many non-NA values.\n",
    "# 指定达到缺失值的数量才能删除\n",
    "df1.dropna(thresh=2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>0.847191</td>\n",
       "      <td>0.499481</td>\n",
       "      <td>0.548473</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>0.770602</td>\n",
       "      <td>0.358454</td>\n",
       "      <td>0.519032</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          0         1         2\n",
       "5  0.847191  0.499481  0.548473\n",
       "6  0.770602  0.358454  0.519032"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#dropna函数，不改变原数据的索引号\n",
    "df1.dropna()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.864100</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.771696</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.406316</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.533904</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.727263</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.727816</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.767004</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>0.847191</td>\n",
       "      <td>0.499481</td>\n",
       "      <td>0.548473</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>0.770602</td>\n",
       "      <td>0.358454</td>\n",
       "      <td>0.519032</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          0         1         2\n",
       "0  0.864100       NaN       NaN\n",
       "1  0.771696       NaN       NaN\n",
       "2  0.406316       NaN       NaN\n",
       "3  0.533904       NaN  0.727263\n",
       "4  0.727816       NaN  0.767004\n",
       "5  0.847191  0.499481  0.548473\n",
       "6  0.770602  0.358454  0.519032"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#drop()函数不改变元数据\n",
    "df1"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 1-（3）缺失值填充"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.792499</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.555113</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.578940</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.669352</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.937611</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.329004</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.443623</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>0.501356</td>\n",
       "      <td>0.122923</td>\n",
       "      <td>0.587066</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>0.666037</td>\n",
       "      <td>0.617876</td>\n",
       "      <td>0.434711</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          0         1         2\n",
       "0  0.792499  0.000000  0.000000\n",
       "1  0.555113  0.000000  0.000000\n",
       "2  0.578940  0.000000  0.000000\n",
       "3  0.669352  0.000000  0.937611\n",
       "4  0.329004  0.000000  0.443623\n",
       "5  0.501356  0.122923  0.587066\n",
       "6  0.666037  0.617876  0.434711"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#不添加其他参数的填充方法\n",
    "df1.fillna(0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.864100</td>\n",
       "      <td>0.900000</td>\n",
       "      <td>0.800000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.771696</td>\n",
       "      <td>0.900000</td>\n",
       "      <td>0.800000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.406316</td>\n",
       "      <td>0.900000</td>\n",
       "      <td>0.800000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.533904</td>\n",
       "      <td>0.900000</td>\n",
       "      <td>0.727263</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.727816</td>\n",
       "      <td>0.900000</td>\n",
       "      <td>0.767004</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>0.847191</td>\n",
       "      <td>0.499481</td>\n",
       "      <td>0.548473</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>0.770602</td>\n",
       "      <td>0.358454</td>\n",
       "      <td>0.519032</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          0         1         2\n",
       "0  0.864100  0.900000  0.800000\n",
       "1  0.771696  0.900000  0.800000\n",
       "2  0.406316  0.900000  0.800000\n",
       "3  0.533904  0.900000  0.727263\n",
       "4  0.727816  0.900000  0.767004\n",
       "5  0.847191  0.499481  0.548473\n",
       "6  0.770602  0.358454  0.519032"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#针对某个列空缺值填充不同的数值的方法\n",
    "df1.fillna({1:0.9,2:0.8})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.864100</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.771696</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.406316</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.533904</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.727263</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.727816</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.767004</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>0.847191</td>\n",
       "      <td>0.499481</td>\n",
       "      <td>0.548473</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>0.770602</td>\n",
       "      <td>0.358454</td>\n",
       "      <td>0.519032</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          0         1         2\n",
       "0  0.864100       NaN       NaN\n",
       "1  0.771696       NaN       NaN\n",
       "2  0.406316       NaN       NaN\n",
       "3  0.533904       NaN  0.727263\n",
       "4  0.727816       NaN  0.767004\n",
       "5  0.847191  0.499481  0.548473\n",
       "6  0.770602  0.358454  0.519032"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.864100</td>\n",
       "      <td>0.900000</td>\n",
       "      <td>0.800000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.771696</td>\n",
       "      <td>0.900000</td>\n",
       "      <td>0.800000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.406316</td>\n",
       "      <td>0.900000</td>\n",
       "      <td>0.800000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.533904</td>\n",
       "      <td>0.900000</td>\n",
       "      <td>0.727263</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.727816</td>\n",
       "      <td>0.900000</td>\n",
       "      <td>0.767004</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>0.847191</td>\n",
       "      <td>0.499481</td>\n",
       "      <td>0.548473</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>0.770602</td>\n",
       "      <td>0.358454</td>\n",
       "      <td>0.519032</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          0         1         2\n",
       "0  0.864100  0.900000  0.800000\n",
       "1  0.771696  0.900000  0.800000\n",
       "2  0.406316  0.900000  0.800000\n",
       "3  0.533904  0.900000  0.727263\n",
       "4  0.727816  0.900000  0.767004\n",
       "5  0.847191  0.499481  0.548473\n",
       "6  0.770602  0.358454  0.519032"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# fillna函数默认操作时不对原数据进行更改。需要针对源数据更改添加参数inplace=True\n",
    "df2=df1.copy()\n",
    "df2.fillna({1:0.9,2:0.8})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.864100</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.771696</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.406316</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.533904</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.727263</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.727816</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.767004</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>0.847191</td>\n",
       "      <td>0.499481</td>\n",
       "      <td>0.548473</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>0.770602</td>\n",
       "      <td>0.358454</td>\n",
       "      <td>0.519032</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          0         1         2\n",
       "0  0.864100       NaN       NaN\n",
       "1  0.771696       NaN       NaN\n",
       "2  0.406316       NaN       NaN\n",
       "3  0.533904       NaN  0.727263\n",
       "4  0.727816       NaN  0.767004\n",
       "5  0.847191  0.499481  0.548473\n",
       "6  0.770602  0.358454  0.519032"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>-1.434808</td>\n",
       "      <td>-0.319881</td>\n",
       "      <td>1.019222</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.019279</td>\n",
       "      <td>-1.243475</td>\n",
       "      <td>-0.466217</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.532758</td>\n",
       "      <td>NaN</td>\n",
       "      <td>-0.273716</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1.633720</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.773500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>-1.104303</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>0.480561</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          0         1         2\n",
       "0 -1.434808 -0.319881  1.019222\n",
       "1  0.019279 -1.243475 -0.466217\n",
       "2  0.532758       NaN -0.273716\n",
       "3  1.633720       NaN  0.773500\n",
       "4 -1.104303       NaN       NaN\n",
       "5  0.480561       NaN       NaN"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df3 = pd.DataFrame(np.random.randn(6,3))\n",
    "df3.iloc[2:,1] = np.nan\n",
    "df3.iloc[4:,2] = np.nan\n",
    "df3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>-1.434808</td>\n",
       "      <td>-0.319881</td>\n",
       "      <td>1.019222</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.019279</td>\n",
       "      <td>-1.243475</td>\n",
       "      <td>-0.466217</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.532758</td>\n",
       "      <td>-1.243475</td>\n",
       "      <td>-0.273716</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1.633720</td>\n",
       "      <td>-1.243475</td>\n",
       "      <td>0.773500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>-1.104303</td>\n",
       "      <td>-1.243475</td>\n",
       "      <td>0.773500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>0.480561</td>\n",
       "      <td>-1.243475</td>\n",
       "      <td>0.773500</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          0         1         2\n",
       "0 -1.434808 -0.319881  1.019222\n",
       "1  0.019279 -1.243475 -0.466217\n",
       "2  0.532758 -1.243475 -0.273716\n",
       "3  1.633720 -1.243475  0.773500\n",
       "4 -1.104303 -1.243475  0.773500\n",
       "5  0.480561 -1.243475  0.773500"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 按照列的缺失值最上面一个有效值填充\n",
    "df3.fillna(method='ffill')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>-1.434808</td>\n",
       "      <td>-0.319881</td>\n",
       "      <td>1.019222</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.019279</td>\n",
       "      <td>-1.243475</td>\n",
       "      <td>-0.466217</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.532758</td>\n",
       "      <td>-1.243475</td>\n",
       "      <td>-0.273716</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1.633720</td>\n",
       "      <td>-1.243475</td>\n",
       "      <td>0.773500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>-1.104303</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.773500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>0.480561</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.773500</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          0         1         2\n",
       "0 -1.434808 -0.319881  1.019222\n",
       "1  0.019279 -1.243475 -0.466217\n",
       "2  0.532758 -1.243475 -0.273716\n",
       "3  1.633720 -1.243475  0.773500\n",
       "4 -1.104303       NaN  0.773500\n",
       "5  0.480561       NaN  0.773500"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#按照列上的缺失值上最后一个有效值填充，设置向下填充的个数\n",
    "df3.fillna(method='ffill',limit=2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Help on function dropna in module pandas.core.frame:\n",
      "\n",
      "dropna(self, axis: 'Axis' = 0, how: 'str' = 'any', thresh=None, subset=None, inplace: 'bool' = False)\n",
      "    Remove missing values.\n",
      "    \n",
      "    See the :ref:`User Guide <missing_data>` for more on which values are\n",
      "    considered missing, and how to work with missing data.\n",
      "    \n",
      "    Parameters\n",
      "    ----------\n",
      "    axis : {0 or 'index', 1 or 'columns'}, default 0\n",
      "        Determine if rows or columns which contain missing values are\n",
      "        removed.\n",
      "    \n",
      "        * 0, or 'index' : Drop rows which contain missing values.\n",
      "        * 1, or 'columns' : Drop columns which contain missing value.\n",
      "    \n",
      "        .. versionchanged:: 1.0.0\n",
      "    \n",
      "           Pass tuple or list to drop on multiple axes.\n",
      "           Only a single axis is allowed.\n",
      "    \n",
      "    how : {'any', 'all'}, default 'any'\n",
      "        Determine if row or column is removed from DataFrame, when we have\n",
      "        at least one NA or all NA.\n",
      "    \n",
      "        * 'any' : If any NA values are present, drop that row or column.\n",
      "        * 'all' : If all values are NA, drop that row or column.\n",
      "    \n",
      "    thresh : int, optional\n",
      "        Require that many non-NA values.\n",
      "    subset : array-like, optional\n",
      "        Labels along other axis to consider, e.g. if you are dropping rows\n",
      "        these would be a list of columns to include.\n",
      "    inplace : bool, default False\n",
      "        If True, do operation inplace and return None.\n",
      "    \n",
      "    Returns\n",
      "    -------\n",
      "    DataFrame or None\n",
      "        DataFrame with NA entries dropped from it or None if ``inplace=True``.\n",
      "    \n",
      "    See Also\n",
      "    --------\n",
      "    DataFrame.isna: Indicate missing values.\n",
      "    DataFrame.notna : Indicate existing (non-missing) values.\n",
      "    DataFrame.fillna : Replace missing values.\n",
      "    Series.dropna : Drop missing values.\n",
      "    Index.dropna : Drop missing indices.\n",
      "    \n",
      "    Examples\n",
      "    --------\n",
      "    >>> df = pd.DataFrame({\"name\": ['Alfred', 'Batman', 'Catwoman'],\n",
      "    ...                    \"toy\": [np.nan, 'Batmobile', 'Bullwhip'],\n",
      "    ...                    \"born\": [pd.NaT, pd.Timestamp(\"1940-04-25\"),\n",
      "    ...                             pd.NaT]})\n",
      "    >>> df\n",
      "           name        toy       born\n",
      "    0    Alfred        NaN        NaT\n",
      "    1    Batman  Batmobile 1940-04-25\n",
      "    2  Catwoman   Bullwhip        NaT\n",
      "    \n",
      "    Drop the rows where at least one element is missing.\n",
      "    \n",
      "    >>> df.dropna()\n",
      "         name        toy       born\n",
      "    1  Batman  Batmobile 1940-04-25\n",
      "    \n",
      "    Drop the columns where at least one element is missing.\n",
      "    \n",
      "    >>> df.dropna(axis='columns')\n",
      "           name\n",
      "    0    Alfred\n",
      "    1    Batman\n",
      "    2  Catwoman\n",
      "    \n",
      "    Drop the rows where all elements are missing.\n",
      "    \n",
      "    >>> df.dropna(how='all')\n",
      "           name        toy       born\n",
      "    0    Alfred        NaN        NaT\n",
      "    1    Batman  Batmobile 1940-04-25\n",
      "    2  Catwoman   Bullwhip        NaT\n",
      "    \n",
      "    Keep only the rows with at least 2 non-NA values.\n",
      "    \n",
      "    >>> df.dropna(thresh=2)\n",
      "           name        toy       born\n",
      "    1    Batman  Batmobile 1940-04-25\n",
      "    2  Catwoman   Bullwhip        NaT\n",
      "    \n",
      "    Define in which columns to look for missing values.\n",
      "    \n",
      "    >>> df.dropna(subset=['name', 'toy'])\n",
      "           name        toy       born\n",
      "    1    Batman  Batmobile 1940-04-25\n",
      "    2  Catwoman   Bullwhip        NaT\n",
      "    \n",
      "    Keep the DataFrame with valid entries in the same variable.\n",
      "    \n",
      "    >>> df.dropna(inplace=True)\n",
      "    >>> df\n",
      "         name        toy       born\n",
      "    1  Batman  Batmobile 1940-04-25\n",
      "\n"
     ]
    }
   ],
   "source": [
    "help(pd.DataFrame.dropna)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
